From 11a008c7c55bcb0ac0959566125baa18f86a70f2 Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Sat, 25 Feb 2006 20:07:28 +0100 Subject: [PATCH] Update Linux time IRQ handler to understand the new stolen/blocked cycle counts exported by Xen. This is based heavily on a patch from Rik van Riel, but extended to distinguish between idle/blocked cycles and stolen cycles. There is still stuff todo: 1. Xen should export the time values in shared memory, to save a hypercall on every time interrupt (even though that is only every 10ms, worst case). 2. As in s390, Xen's cputime_t should be measured at finer granularity than jiffies. Nanoseconds would be a better unit. 3. Break out the internals of the account_steal_time() interface so that we don't need to abuse it so wretchedly. Signed-off-by: Rik van Riel Signed-off-by: Keir Fraser --- .../arch/i386/kernel/time-xen.c | 93 ++++++++++++++++--- 1 file changed, 82 insertions(+), 11 deletions(-) diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c index a5c7a5960f..8a82b9ca61 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c @@ -48,6 +48,8 @@ #include #include #include +#include +#include #include #include @@ -70,6 +72,7 @@ #include #include +#include #if defined (__i386__) #include @@ -123,6 +126,10 @@ static u32 shadow_tv_version; static u64 processed_system_time; /* System time (ns) at last processing. */ static DEFINE_PER_CPU(u64, processed_system_time); +/* How much CPU time was spent blocked and how much was 'stolen'? */ +static DEFINE_PER_CPU(u64, processed_stolen_time); +static DEFINE_PER_CPU(u64, processed_blocked_time); + /* Must be signed, as it's compared with s64 quantities which can be -ve. */ #define NS_PER_TICK (1000000000LL/HZ) @@ -567,9 +574,10 @@ EXPORT_SYMBOL(profile_pc); irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - s64 delta, delta_cpu; + s64 delta, delta_cpu, stolen, blocked; int i, cpu = smp_processor_id(); struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); + struct vcpu_runstate_info runstate; write_seqlock(&xtime_lock); @@ -611,21 +619,81 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) write_sequnlock(&xtime_lock); - /* - * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure - * if there is risk of deadlock if we do (since update_process_times - * may do scheduler rebalancing work and thus acquire runqueue locks). - */ - while (delta_cpu >= NS_PER_TICK) { - delta_cpu -= NS_PER_TICK; - per_cpu(processed_system_time, cpu) += NS_PER_TICK; - update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING, regs); + /* Obtain stolen/blocked cycles, if the hypervisor supports it. */ + if (HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, + cpu, &runstate) == 0) { + /* + * Account stolen ticks. + * HACK: Passing NULL to account_steal_time() + * ensures that the ticks are accounted as stolen. + */ + stolen = runstate.time[RUNSTATE_runnable] + + runstate.time[RUNSTATE_offline] - + per_cpu(processed_stolen_time, cpu); + if (unlikely(stolen < 0)) /* clock jitter */ + stolen = 0; + delta_cpu -= stolen; + if (unlikely(delta_cpu < 0)) { + stolen += delta_cpu; + delta_cpu = 0; + } + do_div(stolen, NS_PER_TICK); + per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK; + account_steal_time(NULL, (cputime_t)stolen); + + /* + * Account blocked ticks. + * HACK: Passing idle_task to account_steal_time() + * ensures that the ticks are accounted as idle/wait. + */ + blocked = runstate.time[RUNSTATE_blocked] - + per_cpu(processed_blocked_time, cpu); + if (unlikely(blocked < 0)) /* clock jitter */ + blocked = 0; + delta_cpu -= blocked; + if (unlikely(delta_cpu < 0)) { + blocked += delta_cpu; + delta_cpu = 0; + } + do_div(blocked, NS_PER_TICK); + per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK; + account_steal_time(idle_task(cpu), (cputime_t)blocked); + + per_cpu(processed_system_time, cpu) += + (stolen + blocked) * NS_PER_TICK; + } + + if (delta_cpu > 0) { + do_div(delta_cpu, NS_PER_TICK); + if (user_mode(regs)) + account_user_time(current, (cputime_t)delta_cpu); + else + account_system_time(current, HARDIRQ_OFFSET, + (cputime_t)delta_cpu); + per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK; } + run_local_timers(); + if (rcu_pending(cpu)) + rcu_check_callbacks(cpu, user_mode(regs)); + scheduler_tick(); + run_posix_cpu_timers(current); + return IRQ_HANDLED; } +static void init_missing_ticks_accounting(int cpu) +{ + struct vcpu_runstate_info runstate = { 0 }; + + HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, cpu, &runstate); + + per_cpu(processed_blocked_time, cpu) = runstate.time[RUNSTATE_blocked]; + per_cpu(processed_stolen_time, cpu) = + runstate.time[RUNSTATE_runnable] + + runstate.time[RUNSTATE_offline]; +} + /* not static: needed by APM */ unsigned long get_cmos_time(void) { @@ -814,6 +882,7 @@ void __init time_init(void) processed_system_time = per_cpu(shadow_time, 0).system_timestamp; per_cpu(processed_system_time, 0) = processed_system_time; + init_missing_ticks_accounting(0); update_wallclock(); @@ -891,6 +960,7 @@ void time_resume(void) processed_system_time = per_cpu(shadow_time, 0).system_timestamp; per_cpu(processed_system_time, 0) = processed_system_time; + init_missing_ticks_accounting(0); update_wallclock(); } @@ -909,6 +979,7 @@ void local_setup_timer(unsigned int cpu) /* Use cpu0 timestamp: cpu's shadow is not initialised yet. */ per_cpu(processed_system_time, cpu) = per_cpu(shadow_time, 0).system_timestamp; + init_missing_ticks_accounting(cpu); } while (read_seqretry(&xtime_lock, seq)); sprintf(timer_name[cpu], "timer%d", cpu); -- 2.30.2